<?php
	function String_html($url)					//返回没有过滤html的正文
	{
		$OutString = "";
		$fp = fopen($url,"r");
		while(!feof($fp))
		{
			$OutString .= fgets($fp);
		}
		fclose($fp);
		return $OutString;
	}
	
	function String_GetAllHtml($url)			//返回过滤掉html的字符串。
	{
		$OutString = strip_tags( String_html($url) );
		return str_replace( 	"screen.width-333)this.width=screen.width-333\">" , "" , $OutString	);
	}
	
	function String_Strip($url)					//返回去除关键串的代码
	{
		$string = String_GetAllHtml($url);
		$toBuReplaced = array(	"screen.width-333)this.width=screen.width-333\">",
								"我爱南开站 -- 同主题阅读 [讨论区: BookStall]",
								"[查看全文] [回复本文] [回复作者] ",
								", 信区: BookStall",
								"※ 来源:·我爱南开站 nkbbs.org ·Web",
								" [讨论区:BookStall]",
								"返回上一页",
								"\n\n\n\n",
								"\n\n\n",
								"\n\n",
								"【书籍名称】",
								"【书籍简介】",
								"【价    位】",
								"【联系方式】",
								"--"
								);
		$beReplace	  = array(	"",
								"",
								"",
								"",
								"",
								"",
								"",
								"\n",
								"\n",
								"\n",
								"<font class = c32>【书籍名称】",
								"<font class = c35>【书籍简介】",	
								"<font class = c33>【价    位】",	
								"<font class = c36>【联系方式】",
								"</font></font></font></font>--"	);
		$string = str_replace( 	$toBuReplaced , $beReplace , $string	);
		//$string = preg_replace( "/(d+).(d+).(d+).(d+)/","ip",$string);//匹配ip地址
		return $string;
	}
	
	function int_all_article($boardName)		//返回文章最大的编号
	{
		$Lastpage_url = "http://bbs.nankai.edu.cn/cgi-bin/bbs/bbstdoc?board=".$boardName."&chart=13";
		$Homepage_url = $Lastpage_url."&start=0";
		
		$s_find = "/<table align=center width=600 border=0 cellpadding=1 cellspacing=1><tr bgColor=d5aad0><td align=center width=30>(.)*<\/td><\/table>/";
		$s_html = String_html($Lastpage_url);
		preg_match_all( $s_find ,$s_html, $matches_table);		
												//匹配正文的html 放到 $matches_table[0] 中
	
		preg_match_all( "/bgColor=#d5aad0>([0-9]+)<\/td>/", $matches_table[0][0], $matches_tr); 
												//$matches_tr[1]中保存的是帖子的BBS编号。
		return $matches_tr[1][count($matches_tr[1])-1]; 
	}
	
	function writeToMysql($boardName,$int,&$all_into_mysql)		//循环写入数据库
	{
		$url = "http://bbs.nankai.edu.cn/cgi-bin/bbs/bbstdoc?board=".$boardName."&chart=13"."&start=".$int;			//版面的20个帖子列表。
		$s_find = "/<table align=center width=600 border=0 cellpadding=1 cellspacing=1><tr bgColor=d5aad0><td align=center width=30>(.)*<\/td><\/table>/";
		$s_html = String_html($url);
		preg_match_all( $s_find ,$s_html, $matches_table);		//匹配正文的html 放到 $matches_table[0] 中
	
		preg_match_all( "/bbstcon\?board=".$boardName."\&file=M\.([0-9]+)\.A/", $matches_table[0][0], $matches_tr); //$matches_tr[1]中保存的是帖子的BBS编号。
		str_replace("image", "", $matches_table[0], $count);	//$count 为image的个数，统计置顶的个数。

		$db = new data_class;
		$count_do_this_for = 0;
		$ToBeReturn = "";		//返回的信息
		for($i = $count ; $i<count($matches_tr[1]) ;$i++ )
		{
			$now_url = "http://bbs.nankai.edu.cn/cgi-bin/bbs/bbstcon?board=".$boardName."&file=M.".$matches_tr[1][$i].".A";
			$writeInToMysql_string = String_GetAllHtml($now_url);
			preg_match("/(\d{4})年(\d{2})月(\d{2})日(\d{2}):(\d{2}):(\d{2})/ ",$writeInToMysql_string,$now_time);
			$now_time = $now_time[1]."-".$now_time[2]."-".$now_time[3]." ".$now_time[4].":".$now_time[5].":".$now_time[6];
			$ToBeReturn .= ($i-$count+1)."..<br>";
			$sql = "INSERT INTO `book_original_data` (`id`,`url`,`content`,`time`) VALUES ('null','".$now_url."','".$writeInToMysql_string."','".$now_time."')";
			$tem_r = $db->query("SELECT * FROM `book_original_data` WHERE `url`='".$now_url."'");
			if($tem_r[0]->url == "")
			{
				$db->execute($sql);	
				$count_do_this_for++;								//在此循环写入数据库。 2008-03-13 16:39:07
			}
		}
		$all_into_mysql  = $all_into_mysql + $count_do_this_for;
		return ($ToBeReturn.($int."到".($int+20)."个完成。<br>共计写入数据库".$count_do_this_for."个数据"));
	}
	
	function DealEachBrigePageOnce($boardName)	//处理桥页的函数,一次全部收录。
	{
		$int_all = int_all_article($boardName);	//总帖子数量
		$int_brige_page = (int)($int_all / 20) ;
		for($i = 0 ; $i<=$int_brige_page ; $i++)
		{
			
			//$BrigePageUrl = "http://bbs.nankai.edu.cn/cgi-bin/bbs/bbstdoc?board=".$boardName."&chart=13"."&start=".($i*20);
			writeToMysql($boardName,($i*20));
		}
	}
?>